# -*- coding: utf-8 -*-
import scrapy
from scrapy import Request
from zc_core.model.items import Box
from zc_core.util.batch_gen import time_to_batch_no
from cec.rules import *
from datetime import datetime
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 分页链接
    index_url = 'https://mall.cec-ec.com.cn/index.html'
    # 商品列表链接
    sku_list_url = 'https://mall.cec-ec.com.cn/goods_list.html?cat={}&page={}'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20
        self.max_page_limit = 100

    def _build_list_req(self, catalog_name, catalog_id, callback, page):
        return Request(
            url=self.sku_list_url.format(catalog_id, page),
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
                'page': page,
                'catalogName': catalog_name,
                'catalogId': catalog_id,
            },
            callback=callback,
            errback=self.error_back,
            dont_filter=True
        )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.index_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_catalog,
            errback=self.error_back,
            dont_filter=True,
        )

    # 处理sku列表
    def parse_catalog(self, response):
        meta = response.meta

        cats = parse_catalog(response)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)
        for cat in cats:
            if cat.get('level') == 3:
                # 采集sku列表
                catalog_id = cat.get('catalogId')
                catalog_name = cat.get('catalogName')
                yield self._build_list_req(catalog_name, catalog_id, self.parse_sku_content_deal, 1)

    # 处理sku列表
    def parse_sku_content_deal(self, response):
        meta = response.meta
        catalog_page = meta.get('page')
        catalog_id = meta.get('catalogId')
        catalog_name = meta.get('catalogName')

        sku_list = parse_sku(response)
        if sku_list:
            yield Box('sku', self.batch_no, sku_list)
        else:
            self.logger.info('分页为空:cat=%s,page=%s' % (catalog_id, catalog_page))
        if catalog_page == 1:
            total_page = parse_total_page(response)
            self.logger.info('总页数: cat=%s, page=%s' % (catalog_id, total_page))
            for page in range(2, total_page + 1):
                yield self._build_list_req(catalog_name, catalog_id, self.parse_sku_content_deal, page)
